ECG<-read.csv("JHUcomb.csv", as.is=T)
SNP<-read.csv("icd.data.oct.11.2007.csv",as.is=T)
AGE.GENDER<-read.csv("age.gender.csv",as.is=T)
RACE<-read.csv("ReynRaceData-100207.csv",as.is=T)
IND<-read.csv( "firing.data.4.09.2008.csv",as.is=T)
IMAGE<-read.csv("img.data11.19.07.csv",as.is=T)
N.ECG<-dim(ECG)[1]
ECG$ID<-rep("",N.ECG)
for (i in seq(1,N.ECG))
{ 
  if (is.na(ECG[i,1]))
  {
     ECG$ID[i]<-NA
  }
  else
  {
     if (substring(ECG[i,1],1,1)==".")
     {
        ECG$ID[i]<-paste("JHU",substring(ECG[i,1],7,9),sep="")
     }
     else
     {  
        ECG$ID[i]<-paste("JHU",substring(ECG[i,1],4,6),sep="")
     }
  }
}
for (i in seq(1,dim(ECG)[1]))
{
   for (j in seq(1,dim(ECG)[2]))
   {
      if (!is.na(ECG[i,j]))
      {
         if (ECG[i,j]==-999)
         {
            ECG[i,j]<-NA
         }
      }
   }  
}
for (i in seq(1,dim(SNP)[1]))
{
   for (j in seq(2,7))
   {
     if ((SNP[i,j]=="UNDETERMINED")||(SNP[i,j]=="-")||(SNP[i,j]=="ERROR")) 
     {
        SNP[i,j]<-NA
     }
   }  
}
names(SNP)<-c("ID","snp1","snp2","snp3","snp4","snp5","snp6")
for (i in seq(1,dim(AGE.GENDER)[1]))
{
   if (AGE.GENDER$Gender[i]=="")
   {
      AGE.GENDER$Gender[i]<-NA
   }
}
for (i in seq(1,dim(RACE)[1]))
{
   if ((RACE$Race[i]!="A")&&
       (RACE$Race[i]!="B")&&
       (RACE$Race[i]!="W")&&
       (RACE$Race[i]!="O"))
         {
           RACE$Race[i]<-NA
         }
}
names(AGE.GENDER)[1]<-"ID"
names(RACE)[1]<-"ID"
names(SNP)[1]<-"ID"
names(IND)[1]<-"ID"
names(IMAGE)[1]<-"ID"
L<-dim(IND)[1]
for (i in seq(1,L))
{
   IND$ID[i]<-substr(IND$ID[i],1,6)
   if (!is.na(IND$Inducible[i]))
   {
      if ((IND$Inducible[i]!="no")&&(IND$Inducible[i]!="yes"))
      {  
         if (IND$Inducible[i]=="no, ")
         {
            IND$Inducible[i]<-"no"
         }
         else
         {
            IND$Inducible[i]<-NA 
         }
      }
   }
}
SNP$IDIN.SNP.IND<-rep(1,dim(SNP)[1])
ECG$IDIN.ECG.IND<-rep(1,dim(ECG)[1])
AGE.GENDER$IDIN.AGE.GENDER.IND<-rep(1,dim(AGE.GENDER)[1])
RACE$IDIN.RACE.IND<-rep(1,dim(RACE)[1])
IND$IDIN.IND.IND<-rep(1,dim(IND)[1])
IMAGE$IDIN.IMAGE.IND<-rep(1,dim(IMAGE)[1])
IMAGE$IMAGE.IND<-rep(1,dim(IMAGE)[1])
d1<-merge(ECG,SNP,by.x="ID",by.y="ID",all=TRUE)
d2<-merge(d1,AGE.GENDER,by.x="ID",by.y="ID",all=TRUE)
d3<-merge(d2,RACE,by.x="ID",by.y="ID",all=TRUE)
d4<-merge(d3,IND,by.x="ID",by.y="ID",all=TRUE)
d5<-merge(d4,IMAGE,by.x="ID",by.y="ID",all=TRUE)
d<-d5
rm(d1)
rm(d2)
rm(d3)
rm(d4)
rm(d5)
d$SNP.ALL.IND<-complete.cases(d$snp1,d$snp2,d$snp3,d$snp4,d$snp5,d$snp6)
d$ECG.IND<-complete.cases(d$QTVI_log)
d$AGE.IND<-complete.cases(d$Birth.Year.x)
d$GENDER.IND<-complete.cases(d$Gender.x)
d$RACE.IND<-complete.cases(d$Race)
d$IND.IND<-complete.cases(d$Inducible)
d$IMAGE.IND<-complete.cases(d$DEmass)
d$Birth.Year.x[is.na(d$Birth.Year.x)]<-0
d$Birth.Year.y[is.na(d$Birth.Year.y)]<-0
d$Birth.Year<-d$Birth.Year.x+d$Birth.Year.y*(d$Birth.Year.x==0)
d<-d[d$Birth.Year<=1995,]
d$IMPLANT.IND<-complete.cases(d$Implant.Date)
d$Firing.Date<-as.Date(d$Firings,format="%m/%d/%Y")
d$Implant.Date<-as.Date(d$Implant.Date,format="%m/%d/%Y")
sum(d$IMPLANT.IND)
sum(!is.na(d$Implant.Date))
d$Days.To.Firing<-d$Firing.Date-d$Implant.Date
d$FIRED.IND<-!is.na(d$Days.To.Firing)
today<-as.Date("3/04/2008",format="%m/%d/%Y")
d$Days.Of.Implant<-today-d$Implant.Date
d$APP.FIRED.IND<-(d$AP.vs.IAP=="AP")
write.csv(d,file="data.csv",row.names=F)
dind<-d[d$IND.IND,]
write.csv(dind,file="data.ind.csv",row.names=F)
d$CLINICAL.IND<-(d$AGE.IND)*(d$RACE.IND)*(d$GENDER.IND)
dind$CLINICAL.IND<-(dind$AGE.IND)*(dind$RACE.IND)*(dind$GENDER.IND)

